;;;; HSV - Hierarchical Separated Values
;;;;
;;;; A text-based file format and streaming protocol using ASCII control characters.
;;;; Unlimited nesting (like JSON). No escaping required. Binary data supported.
;;;;
;;;; Copyright 2026 Danslav Slavenskoj, Lingenic LLC
;;;; License: CC0 1.0 - Public Domain
;;;; https://creativecommons.org/publicdomain/zero/1.0/
;;;; You may use this code for any purpose without attribution.
;;;;
;;;; Spec: https://hsvfile.com
;;;; Repo: https://github.com/LingenicLLC/HSV

(defpackage :hsv
  (:use :cl)
  (:export #:parse
           #:+soh+ #:+stx+ #:+etx+ #:+eot+
           #:+so+ #:+si+ #:+dle+
           #:+fs+ #:+gs+ #:+rs+ #:+us+
           #:document-header
           #:document-records
           #:get-value
           #:has-key))

(in-package :hsv)

;;; Control characters
(defconstant +soh+ (code-char #x01))  ; Start of Header
(defconstant +stx+ (code-char #x02))  ; Start of Text (data block)
(defconstant +etx+ (code-char #x03))  ; End of Text
(defconstant +eot+ (code-char #x04))  ; End of Transmission
(defconstant +so+  (code-char #x0e))  ; Shift Out (start nested)
(defconstant +si+  (code-char #x0f))  ; Shift In (end nested)
(defconstant +dle+ (code-char #x10))  ; Data Link Escape (binary mode)
(defconstant +fs+  (code-char #x1c))  ; File/Record Separator
(defconstant +gs+  (code-char #x1d))  ; Group/Array Separator
(defconstant +rs+  (code-char #x1e))  ; Record/Property Separator
(defconstant +us+  (code-char #x1f))  ; Unit/Key-Value Separator

;;; Document structure
(defstruct document
  (header nil)
  (records nil))

;;; Split string by separator, respecting SO/SI nesting
(defun split-respecting-nesting (text sep)
  (let ((parts '())
        (current (make-string-output-stream))
        (depth 0))
    (loop for c across text do
      (cond
        ((char= c +so+)
         (incf depth)
         (write-char c current))
        ((char= c +si+)
         (decf depth)
         (write-char c current))
        ((and (char= c sep) (zerop depth))
         (push (get-output-stream-string current) parts)
         (setf current (make-string-output-stream)))
        (t
         (write-char c current))))
    (let ((final (get-output-stream-string current)))
      (when (or (plusp (length final)) parts)
        (push final parts)))
    (nreverse parts)))

;;; Extract DLE+STX...DLE+ETX binary sections
(defun extract-binary-sections (text)
  (let ((result (make-string-output-stream))
        (binaries (make-hash-table :test 'equal))
        (i 0)
        (len (length text))
        (placeholder-count 0))
    (loop while (< i len) do
      (cond
        ((and (char= (char text i) +dle+)
              (< (1+ i) len)
              (char= (char text (1+ i)) +stx+))
         ;; Start of binary section
         (let ((j (+ i 2))
               (binary-data (make-string-output-stream)))
           (loop while (< j len) do
             (cond
               ((and (char= (char text j) +dle+)
                     (< (1+ j) len))
                (cond
                  ((char= (char text (1+ j)) +etx+)
                   ;; End of binary section
                   (let ((placeholder (format nil "~CBINARY~D~C" #\Null placeholder-count #\Null)))
                     (setf (gethash placeholder binaries) (get-output-stream-string binary-data))
                     (write-string placeholder result)
                     (incf placeholder-count)
                     (setf i (+ j 2))
                     (return)))
                  ((char= (char text (1+ j)) +dle+)
                   ;; Escaped DLE
                   (write-char +dle+ binary-data)
                   (incf j 2))
                  (t
                   (write-char (char text j) binary-data)
                   (incf j))))
               (t
                (write-char (char text j) binary-data)
                (incf j))))
           (when (>= j len)
             ;; Unterminated binary
             (write-char (char text i) result)
             (incf i))))
        (t
         (write-char (char text i) result)
         (incf i))))
    (values (get-output-stream-string result) binaries)))

;;; Restore binary placeholders
(defun restore-binaries (value binaries)
  (let ((result value))
    (maphash (lambda (placeholder data)
               (loop for pos = (search placeholder result)
                     while pos do
                     (setf result (concatenate 'string
                                               (subseq result 0 pos)
                                               data
                                               (subseq result (+ pos (length placeholder)))))))
             binaries)
    result))

;;; Parse a value (may be string, array, or nested object)
(defun parse-value (value binaries)
  (let ((restored (restore-binaries value binaries)))
    (cond
      ;; Check for nested structure (SO at start, SI at end)
      ((and (>= (length restored) 2)
            (char= (char restored 0) +so+)
            (char= (char restored (1- (length restored))) +si+))
       (parse-object (subseq restored 1 (1- (length restored))) binaries))
      ;; Check for array
      ((position +gs+ restored)
       (mapcar (lambda (p) (parse-value p binaries))
               (split-respecting-nesting restored +gs+)))
      ;; Plain string
      (t restored))))

;;; Parse an object from RS-separated properties
(defun parse-object (content binaries)
  (let ((obj (make-hash-table :test 'equal)))
    (dolist (prop (split-respecting-nesting content +rs+))
      (let ((parts (split-respecting-nesting prop +us+)))
        (when (>= (length parts) 2)
          (let* ((key (first parts))
                 (val (format nil "~{~A~^~C~}" (rest parts) +us+)))
            (setf (gethash key obj) (parse-value val binaries))))))
    obj))

;;; Main parse function
(defun parse (text)
  (multiple-value-bind (processed-text binaries)
      (extract-binary-sections text)
    (let ((doc (make-document))
          (i 0)
          (len (length processed-text)))
      (loop while (< i len) do
        (let ((c (char processed-text i)))
          (cond
            ((char= c +soh+)
             ;; Find STX
             (let ((stx-pos (position +stx+ processed-text :start (1+ i))))
               (if (null stx-pos)
                   (incf i)
                   (progn
                     ;; Parse header
                     (setf (document-header doc)
                           (parse-object (subseq processed-text (1+ i) stx-pos) binaries))
                     ;; Find ETX
                     (let ((etx-pos (position +etx+ processed-text :start (1+ stx-pos))))
                       (if (null etx-pos)
                           (setf i (1+ stx-pos))
                           (progn
                             ;; Parse records
                             (dolist (record (split-respecting-nesting
                                              (subseq processed-text (1+ stx-pos) etx-pos)
                                              +fs+))
                               (let ((obj (parse-object record binaries)))
                                 (when (plusp (hash-table-count obj))
                                   (push obj (document-records doc)))))
                             (setf i (1+ etx-pos)))))))))
            ((char= c +stx+)
             ;; Find ETX
             (let ((etx-pos (position +etx+ processed-text :start (1+ i))))
               (if (null etx-pos)
                   (incf i)
                   (progn
                     ;; Parse records
                     (dolist (record (split-respecting-nesting
                                      (subseq processed-text (1+ i) etx-pos)
                                      +fs+))
                       (let ((obj (parse-object record binaries)))
                         (when (plusp (hash-table-count obj))
                           (push obj (document-records doc)))))
                     (setf i (1+ etx-pos))))))
            (t (incf i)))))
      ;; Reverse records to maintain order
      (setf (document-records doc) (nreverse (document-records doc)))
      doc)))

;;; Helper: Get value from object by key
(defun get-value (obj key)
  (when (hash-table-p obj)
    (gethash key obj)))

;;; Helper: Check if object has key
(defun has-key (obj key)
  (when (hash-table-p obj)
    (nth-value 1 (gethash key obj))))
